10. Validators

Annotated Validators

使用 Annotated 上的 validator 去验证类型

from typing import Any, List
from typing_extensions import Annotated
from pydantic import BaseModel, ValidationError
from pydantic.functional_validators import AfterValidator

def check_squares(v: int) -> int:
    assert v**0.5 % 1 == 0, f'{v} is not a square number'
    return v

def double(v: Any) -> Any:
    return v * 2


MyNumber = Annotated[int, AfterValidator(double), AfterValidator(check_squares)]


class DemoModel(BaseModel):
    number: List[MyNumber]


print(DemoModel(number=[2, 8]))
#> number=[4, 16]
try:
    DemoModel(number=[2, 4])
except ValidationError as e:
    print(e)
    """
    1 validation error for DemoModel
    number
      Assertion failed, 8 is not a square number
    assert ((8 ** 0.5) % 1) == 0 [type=assertion_error, input_value=4, input_type=int]
    """

Before, After, Wrap and Plain validators

After validators 在 Pydantic 内部解析之后运行. They are generally more type safe and thus easier to implement.
Before validators 在内部解析之前运行. These are more flexible than After validators since they can modify the raw input, but they also have to deal with the raw input, which in theory could be any arbitrary object.
Plain validators are like a mode='before' validator 但它立即终止验证，不会调用任何其他验证器，pydantic 也不会调用它内部的验证逻辑
Wrap validators 是最复杂的. You can run code before or after Pydantic and other validators do their thing or you can terminate validation immediately, both with a successful value or an error. 你能添加多个 before, after, or mode='wrap' validators, 但 PlainValidator 只能有一个 since a plain validator will not call any inner validators. Here's an example of a mode='wrap' validator:

import json
from typing import Any, List

from typing_extensions import Annotated

from pydantic import (
    BaseModel,
    ValidationError,
    ValidationInfo,
    ValidatorFunctionWrapHandler,
)
from pydantic.functional_validators import WrapValidator


def maybe_strip_whitespace(
    v: Any, handler: ValidatorFunctionWrapHandler, info: ValidationInfo
) -> int:
    if info.mode == 'json':
        assert isinstance(v, str), 'In JSON mode the input must be a string!'
        # you can call the handler multiple times
        try:
            return handler(v)
        except ValidationError:
            return handler(v.strip())
    assert info.mode == 'python'
    assert isinstance(v, int), 'In Python mode the input must be an int!'
    # do no further validation
    return v


MyNumber = Annotated[int, WrapValidator(maybe_strip_whitespace)]


class DemoModel(BaseModel):
    number: List[MyNumber]


print(DemoModel(number=[2, 8]))
#> number=[2, 8]
print(DemoModel.model_validate_json(json.dumps({'number': [' 2 ', '8']})))
#> number=[2, 8]
try:
    DemoModel(number=['2'])
except ValidationError as e:
    print(e)
    """
    1 validation error for DemoModel
    number
      Assertion failed, In Python mode the input must be an int!
    assert False
     +  where False = isinstance('2', int) [type=assertion_error, input_value='2', input_type=str]
    """

The same "modes" apply to @field_validator, which is discussed in the next section.

`Annotated` 中 validator 的顺序

Annotated 中 validator 的顺序至关重要。验证从右到左，然后返回。

即：从右到左运行所有 before validators（或者 wrap），然后再从左到右运行所有 after validators

That is, it goes from right to left running all "before" validators (or calling into "wrap" validators), then left to right back out calling all "after" validators.

from typing import Any, Callable, List, cast

from typing_extensions import Annotated, TypedDict

from pydantic import (
    AfterValidator,
    BaseModel,
    BeforeValidator,
    PlainValidator,
    ValidationInfo,
    ValidatorFunctionWrapHandler,
    WrapValidator,
)
from pydantic.functional_validators import field_validator


class Context(TypedDict):
    logs: List[str]


def make_validator(label: str) -> Callable[[str, ValidationInfo], str]:
    def validator(v: Any, info: ValidationInfo) -> Any:
        context = cast(Context, info.context)
        context['logs'].append(label)
        return v

    return validator


def make_wrap_validator(
    label: str,
) -> Callable[[str, ValidatorFunctionWrapHandler, ValidationInfo], str]:
    def validator(
        v: Any, handler: ValidatorFunctionWrapHandler, info: ValidationInfo
    ) -> Any:
        context = cast(Context, info.context)
        context['logs'].append(f'{label}: pre')
        result = handler(v)
        context['logs'].append(f'{label}: post')
        return result

    return validator


class A(BaseModel):
    x: Annotated[
        str,
        BeforeValidator(make_validator('before-1')),
        AfterValidator(make_validator('after-1')),
        WrapValidator(make_wrap_validator('wrap-1')),
        BeforeValidator(make_validator('before-2')),
        AfterValidator(make_validator('after-2')),
        WrapValidator(make_wrap_validator('wrap-2')),
        BeforeValidator(make_validator('before-3')),
        AfterValidator(make_validator('after-3')),
        WrapValidator(make_wrap_validator('wrap-3')),
        BeforeValidator(make_validator('before-4')),
        AfterValidator(make_validator('after-4')),
        WrapValidator(make_wrap_validator('wrap-4')),
    ]
    y: Annotated[
        str,
        BeforeValidator(make_validator('before-1')),
        AfterValidator(make_validator('after-1')),
        WrapValidator(make_wrap_validator('wrap-1')),
        BeforeValidator(make_validator('before-2')),
        AfterValidator(make_validator('after-2')),
        WrapValidator(make_wrap_validator('wrap-2')),
        PlainValidator(make_validator('plain')),
        BeforeValidator(make_validator('before-3')),
        AfterValidator(make_validator('after-3')),
        WrapValidator(make_wrap_validator('wrap-3')),
        BeforeValidator(make_validator('before-4')),
        AfterValidator(make_validator('after-4')),
        WrapValidator(make_wrap_validator('wrap-4')),
    ]

    val_x_before = field_validator('x', mode='before')(
        make_validator('val_x before')
    )
    val_x_after = field_validator('x', mode='after')(
        make_validator('val_x after')
    )
    val_y_wrap = field_validator('y', mode='wrap')(
        make_wrap_validator('val_y wrap')
    )


context = Context(logs=[])

A.model_validate({'x': 'abc', 'y': 'def'}, context=context)
print(context['logs'])
"""
[
    'val_x before',
    'wrap-4: pre',
    'before-4',
    'wrap-3: pre',
    'before-3',
    'wrap-2: pre',
    'before-2',
    'wrap-1: pre',
    'before-1',
    'after-1',
    'wrap-1: post',
    'after-2',
    'wrap-2: post',
    'after-3',
    'wrap-3: post',
    'after-4',
    'wrap-4: post',
    'val_x after',
    'val_y wrap: pre',
    'wrap-4: pre',
    'before-4',
    'wrap-3: pre',
    'before-3',
    'plain',
    'after-3',
    'wrap-3: post',
    'after-4',
    'wrap-4: post',
    'val_y wrap: post',
]
"""

默认值的验证

如果有默认值，那么 validators 不会运行（无论是 @field_validator validators 还是 Annotated validators）。

使用 Field(validate_default=True) 来强行开启默认值验证；不过，更好的方法还是使用 @model_validator(mode='before') where the function is called before the inner validator is called.

from typing_extensions import Annotated

from pydantic import BaseModel, Field, field_validator


class Model(BaseModel):
    x: str = 'abc'
    y: Annotated[str, Field(validate_default=True)] = 'xyz'

    @field_validator('x', 'y')
    @classmethod
    def double(cls, v: str) -> str:
        return v * 2


print(Model())
#> x='abc' y='xyzxyz'
print(Model(x='foo'))
#> x='foofoo' y='xyzxyz'
print(Model(x='abc'))
#> x='abcabc' y='xyzxyz'
print(Model(x='foo', y='bar'))
#> x='foofoo' y='barbar'

Field validators

使用 @field_validator decorator 来验证指定字段

from pydantic import (
    BaseModel,
    ValidationError,
    ValidationInfo,
    field_validator,
)


class UserModel(BaseModel):
    name: str
    id: int

    @field_validator('name')
    @classmethod
    def name_must_contain_space(cls, v: str) -> str:
        if ' ' not in v:
            raise ValueError('must contain a space')
        return v.title()

    # you can select multiple fields, or use '*' to select all fields
    @field_validator('id', 'name')
    @classmethod
    def check_alphanumeric(cls, v: str, info: ValidationInfo) -> str:
        if isinstance(v, str):
            # info.field_name is the name of the field being validated
            is_alphanumeric = v.replace(' ', '').isalnum()
            assert is_alphanumeric, f'{info.field_name} must be alphanumeric'
        return v


print(UserModel(name='John Doe', id=1))
#> name='John Doe' id=1

try:
    UserModel(name='samuel', id=1)
except ValidationError as e:
    print(e)
    """
    1 validation error for UserModel
    name
      Value error, must contain a space [type=value_error, input_value='samuel', input_type=str]
    """

try:
    UserModel(name='John Doe', id='abc')
except ValidationError as e:
    print(e)
    """
    1 validation error for UserModel
    id
      Input should be a valid integer, unable to parse string as an integer [type=int_parsing, input_value='abc', input_type=str]
    """

try:
    UserModel(name='John Doe!', id=1)
except ValidationError as e:
    print(e)
    """
    1 validation error for UserModel
    name
      Assertion failed, name must be alphanumeric
    assert False [type=assertion_error, input_value='John Doe!', input_type=str]
    """

@field_validator 是类方法（class methods），所以它的首个参数是个 UserModel 类，而不是 UserModel 实例，建议在其下方添加 @classmethod 装饰器以获得正确的 type checking
第二个参数就是字段的值了
第三个参数是一个 pydantic.ValidationInfo 实例
validators 要不 return 一个处理过的值，要不 raise a ValueError or AssertionError (可能会用到 assert)
单个 validator 能覆盖到多个字段，只要提在装饰器里供多个字段名即可（使用 '*' 能覆盖到所有参数）

If you make use of assert statements, keep in mind that running Python with the -O optimization flag disables assert statements, and validators will stop working.

如果你想在 @field_validator 中拿到其他字段的信息，可以使用 ValidationInfo.data，它是一个 field name -> field value 的字典. 字段的验证会按照字段定义的顺序进行，所以访问 ValidationInfo.data 里的数据要小心了。for example, you would not be able to access info.data['id'] from within name_must_contain_space. 在大部分情况下，使用 @model_validator 来处理多字段是更好的选择

model validators

使用 @model_validator 来对整个 model 起验证作用

from typing import Any
from typing_extensions import Self
from pydantic import BaseModel, ValidationError, model_validator


class UserModel(BaseModel):
    username: str
    password1: str
    password2: str

    @model_validator(mode='before')
    @classmethod
    def check_card_number_omitted(cls, data: Any) -> Any:
        if isinstance(data, dict):
            assert (
                'card_number' not in data
            ), 'card_number should not be included'
        return data

    @model_validator(mode='after')
    def check_passwords_match(self) -> Self:
        pw1 = self.password1
        pw2 = self.password2
        if pw1 is not None and pw2 is not None and pw1 != pw2:
            raise ValueError('passwords do not match')
        return self


print(UserModel(username='scolvin', password1='zxcvbn', password2='zxcvbn'))
#> username='scolvin' password1='zxcvbn' password2='zxcvbn'
try:
    UserModel(username='scolvin', password1='zxcvbn', password2='zxcvbn2')
except ValidationError as e:
    print(e)
    """
    1 validation error for UserModel
      Value error, passwords do not match [type=value_error, input_value={'username': 'scolvin', '... 'password2': 'zxcvbn2'}, input_type=dict]
    """

try:
    UserModel(
        username='scolvin',
        password1='zxcvbn',
        password2='zxcvbn',
        card_number='1234',
    )
except ValidationError as e:
    print(e)
    """
    1 validation error for UserModel
      Assertion failed, card_number should not be included
    assert 'card_number' not in {'card_number': '1234', 'password1': 'zxcvbn', 'password2': 'zxcvbn', 'username': 'scolvin'} [type=assertion_error, input_value={'username': 'scolvin', '..., 'card_number': '1234'}, input_type=dict]
    """

Model validators 可以是 mode='before', mode='after' or mode='wrap'

Before model validators 传入一个 dict[str, Any] 不过也可以是个 model 实例 (e.g. if UserModel.model_validate(UserModel.construct(...)) is called)，或者其他任何任何乱七八糟的东西（取决于你在 model_validate 传入了啥），得益于此，mode='before' validators 异常强大，也异常复杂，容易出错。 Before model validators 需被定义为 class method

第一个参数是 cls
第二个参数就是 input (建议定义为 Any 类型，然后用 isinstance 去具体地判断类型）
第三个参数 (if present) 是 pydantic.ValidationInfo 实例

After model validators 是一个 instance methods，接收一个 model instance 作为第一个参数（self），返回一个 Self 它比 before 更容易实现，Since these are fully type safe，如果有任何字段验证出错了, mode='after' validators for that field will not be called.

在 validators 处理错误

你可以在 validator 中 raise either a ValueError or AssertionError (including ones generated by assert ... statements)；也可以选择 raise a PydanticCustomError，少一点简洁，多一点灵活。任何其他错误 (including TypeError) 都会冒出来，不会被封装在 ValidationError 中

from pydantic_core import PydanticCustomError
from pydantic import BaseModel, ValidationError, field_validator

class Model(BaseModel):
    x: int
    
    @field_validator('x')
    @classmethod
    def validate_x(cls, v: int) -> int:
        if v % 42 == 0:
            raise PydanticCustomError(
                'the_answer_error',
                '{number} is the answer!',
                {'number': v},
            )
        return v


try:
    Model(x=42 * 2)
except ValidationError as e:
    print(e)
    """
    1 validation error for Model
    x
      84 is the answer! [type=the_answer_error, input_value=84, input_type=int]
    """

特殊类型

Pydantic 提供了一些特殊类型来自定义验证过程

InstanceOf 类型能够验证 value 是否是给定类的实例

from typing import List
from pydantic import BaseModel, InstanceOf, ValidationError


class Fruit:
    def __repr__(self):
        return self.__class__.__name__

class Banana(Fruit):
    ...

class Apple(Fruit):
    ...

class Basket(BaseModel):
    fruits: List[InstanceOf[Fruit]]

print(Basket(fruits=[Banana(), Apple()]))
#> fruits=[Banana, Apple]
try:
    Basket(fruits=[Banana(), 'Apple'])
except ValidationError as e:
    print(e)
    """
    1 validation error for Basket
    fruits
      Input should be an instance of Fruit [type=is_instance_of, input_value='Apple', input_type=str]
    """

SkipValidation 类型帮助字段躲过验证

from typing import List
from pydantic import BaseModel, SkipValidation


class Model(BaseModel):
    names: List[SkipValidation[str]]


m = Model(names=['foo', 'bar'])
print(m)
#> names=['foo', 'bar']

m = Model(names=['foo', 123])  
print(m)
#> names=['foo', 123]

Field checks

在类创建过程中，validators 会被检查以确认它们指定的字段实际上存在于 model 中。

这个特性在一些场合可能是不被需要的，比如你的 validators 是用来验证仅出现在 model 子类中的字段

如果您希望在类创建过程中禁用这些检查，您可以将 check_fields=False 作为关键字参数传递给 validators。

Dataclass validators

Validators also work with Pydantic dataclasses.

from pydantic import field_validator
from pydantic.dataclasses import dataclass


@dataclass
class DemoDataclass:
    product_id: str  # should be a five-digit string, may have leading zeros

    @field_validator('product_id', mode='before')
    @classmethod
    def convert_int_serial(cls, v):
        if isinstance(v, int):
            v = str(v).zfill(5)
        return v


print(DemoDataclass(product_id='01234'))
#> DemoDataclass(product_id='01234')
print(DemoDataclass(product_id=2468))
#> DemoDataclass(product_id='02468')

Validation Context

你可以在 validation methods 中传入一个 context 关键词参数，在 validation 的 ValidationInfo 参数的 context 里能被拿到

from pydantic import BaseModel, ValidationInfo, field_validator

class Model(BaseModel):
    text: str

    @field_validator('text')
    @classmethod
    def remove_stopwords(cls, v: str, info: ValidationInfo):
        context = info.context
        if context:
            stopwords = context.get('stopwords', set())
            v = ' '.join(w for w in v.split() if w.lower() not in stopwords)
        return v


data = {'text': 'This is an example document'}
print(Model.model_validate(data))  # no context
#> text='This is an example document'
print(Model.model_validate(data, context={'stopwords': ['this', 'is', 'an']}))
#> text='example document'
print(Model.model_validate(data, context={'stopwords': ['document']}))
#> text='This is an example'

这在您需要在运行时动态更新验证行为时非常有用。例如，如果您希望一个字段具有动态可控的一组允许值，可以通过上下文传递允许的值，并有一个单独的机制来更新允许的内容。

from typing import Any, Dict, List

from pydantic import (
    BaseModel,
    ValidationError,
    ValidationInfo,
    field_validator,
)

_allowed_choices = ['a', 'b', 'c']


def set_allowed_choices(allowed_choices: List[str]) -> None:
    global _allowed_choices
    _allowed_choices = allowed_choices


def get_context() -> Dict[str, Any]:
    return {'allowed_choices': _allowed_choices}


class Model(BaseModel):
    choice: str

    @field_validator('choice')
    @classmethod
    def validate_choice(cls, v: str, info: ValidationInfo):
        allowed_choices = info.context.get('allowed_choices')
        if allowed_choices and v not in allowed_choices:
            raise ValueError(f'choice must be one of {allowed_choices}')
        return v


print(Model.model_validate({'choice': 'a'}, context=get_context()))
#> choice='a'

try:
    print(Model.model_validate({'choice': 'd'}, context=get_context()))
except ValidationError as exc:
    print(exc)
    """
    1 validation error for Model
    choice
      Value error, choice must be one of ['a', 'b', 'c'] [type=value_error, input_value='d', input_type=str]
    """

set_allowed_choices(['b', 'c'])

try:
    print(Model.model_validate({'choice': 'a'}, context=get_context()))
except ValidationError as exc:
    print(exc)
    """
    1 validation error for Model
    choice
      Value error, choice must be one of ['b', 'c'] [type=value_error, input_value='a', input_type=str]
    """

Similarly, you can use a context for serialization.

Using validation context with `BaseModel` initialization¶

Although there is no way to specify a context in the standard BaseModel initializer, you can work around this through the use of contextvars.ContextVar and a custom __init__ method:

from contextlib import contextmanager
from contextvars import ContextVar
from typing import Any, Dict, Iterator
from pydantic import BaseModel, ValidationInfo, field_validator

_init_context_var = ContextVar('_init_context_var', default=None)

@contextmanager
def init_context(value: Dict[str, Any]) -> Iterator[None]:
    token = _init_context_var.set(value)
    try:
        yield
    finally:
        _init_context_var.reset(token)

class Model(BaseModel):
    my_number: int

    def __init__(self, /, **data: Any) -> None:
        self.__pydantic_validator__.validate_python(
            data,
            self_instance=self,
            context=_init_context_var.get(),
        )

    @field_validator('my_number')
    @classmethod
    def multiply_with_context(cls, value: int, info: ValidationInfo) -> int:
        if info.context:
            multiplier = info.context.get('multiplier', 1)
            value = value * multiplier
        return value


print(Model(my_number=2))
#> my_number=2

with init_context({'multiplier': 3}):
    print(Model(my_number=2))
    #> my_number=6

print(Model(my_number=2))
#> my_number=2

复用 Validators

The following approach demonstrates how you can reuse a validator so that redundancy is minimized and the models become again almost declarative.

from pydantic import BaseModel, field_validator


def normalize(name: str) -> str:
    return ' '.join((word.capitalize()) for word in name.split(' '))


class Producer(BaseModel):
    name: str

    _normalize_name = field_validator('name')(normalize)


class Consumer(BaseModel):
    name: str

    _normalize_name = field_validator('name')(normalize)


jane_doe = Producer(name='JaNe DOE')
print(repr(jane_doe))
#> Producer(name='Jane Doe')
john_doe = Consumer(name='joHN dOe')
print(repr(john_doe))
#> Consumer(name='John Doe')

10. Validators

Annotated Validators​

Before, After, Wrap and Plain validators​

Annotated 中 validator 的顺序​

默认值的验证​

Field validators​

model validators​

在 validators 处理错误​

特殊类型​

Field checks​

Dataclass validators​

Validation Context​

Using validation context with BaseModel initialization¶​

复用 Validators​